# -*- coding: utf-8 -*-
import scrapy
from copy import deepcopy
from scrapy_redis.spiders import RedisSpider


class DangdangSpider(RedisSpider):
    name = 'dangdang'
    allowed_domains = ['dangdang.com']
    # start_urls = ['http://book.dangdang.com/']
    redis_key = "dangdang"

    def parse(self, response):
        #大分类
        div_list = response.xpath("//div[@class='con flq_body']/div")
        for div in div_list:
            item={}
            #获取大分类的名字
            item["b_cate"]=div.xpath("./dl/dt//text()").extract()
            item["b_cate"]=[i.strip() for i in item["b_cate"] if len(i.strip())>0]
            #中间分类分组
            dl_list=div.xpath("./div//dl[@class='inner_dl']")
            for dl in dl_list:
                #获取中间分组的名子
                item["m_cate"]=dl.xpath("./dt//text()").extract()
                item["m_cate"] = [i.strip() for i in item["m_cate"] if len(i.strip()) > 0][0]
                # 小分类
                a_list=dl.xpath("./dd/a")
                for a in a_list:
                    # 获取小分类的url链接
                    item["s_href"]=a.xpath("./@href").extract_first()
                    # 获取小分组的名字
                    item["s_cate"]=a.xpath("./text()").extract_first()
                    if item["s_href"] is not None:
                        yield scrapy.Request(
                            item["s_href"],
                            callback=self.parse_book_list,
                            meta={"item":deepcopy(item)}
                        )
    # 详细页面的信息抓取
    def parse_book_list(self,response):
        item=response.meta["item"]
        # 获取图书列表
        li_list=response.xpath("//ul[@class='bigimg']/li")
        for li in li_list:
            # 获取图书图片url链接
            item["book_img"]=li.xpath("./a[@class='pic']/img/@src").extract_first()
            if item["book_img"]=="images/model/guan/url_none.png":
                item["book_img"]=li.xpath("./a[@class='pic']/img/@data-original").extract_first()
            # 获取图书的名字
            item["book_name"]=li.xpath("./p[@class='name']/a/@title").extract_first()
            # 图书简介
            item["book_desc"]=li.xpath("./p[@class='detail']/text()").extract_first()
            # 图书价格
            item["book_price"]=li.xpath(".//span[@class='search_now_price']/text()").extract_first()
            # 图书作者
            item["book_author"]=li.xpath("./p[@class='search_book_author']/span[1]/a/text()").extract()
            # 出版日期
            item["book_publish_date"] = li.xpath("./p[@class='search_book_author']/span[2]/text()").extract_first()
            # 出版社
            item["book_press"] = li.xpath("./p[@class='search_book_author']/span[3]/a/text()").extract_first()
            # print(item)
            yield item
        # 图书列表页面下一页url获取
        next_url=response.xpath("//li[@class='next']/a/@href").extract_first()
        if next_url is not None:

            next_url="http://category.dangdang.com"+next_url
            yield scrapy.Request(
                next_url,
                callback=self.parse_book_list,
                meta={"item":item}
            )